# delimit ; 
clear; 
clear matrix;
set mem 300m;	
set more 1 ;  
drop _all;
program drop _all;
capture log close;
set seed 25;
*log using C:\Dropbox\hrs\wealthcouples\dataprep_couples2.log, replace ; 
*use C:\Dropbox\hrs\wealthcouples\dataprep1;

log using C:\Users\ebf26\Dropbox\hrs\wealthcouples\dataprep_couples2.log, replace ; 
use C:\Users\ebf26\Dropbox\hrs\wealthcouples\dataprep1;

/*keep HHID PN realyear wave *medcost* *dead* adist* cohort* ass* age age_wife agej* died dead age1 age2 age3 age4 age5 male female
PI* Pip* indnum married time* lhhinc hhinc lanninc  lsocy FE lfpr estate* house* firstass *marstat heal
re wheels liquid IRA stock bus  bonds exasset debts remort house mort trust firsthh children death* ins* drugc* *oop* *iprem* nurs* hos* drt* estateflag hhmedicaid anninc tc rtc *wgt* SSDIY SSIinc
ageshift* sing* bPI* hhage* hhstatus *mandied *manheal *mannursing pi_perc* amsp* medicaid *medicaid_pay* firstwave;*/

keep HHID PN realyear wave hhmedcost_b *dead* adist* cohort* ass* age age_wife agej* died dead age1 age2 age3 age4 age5 male female
PI* Pip* indnum married time* lhhinc hhinc lanninc  lsocy FE lfpr estate* house* firstass *marstat heal
re wheels liquid IRA stock bus  bonds exasset debts remort house mort trust firsthh children death* ins* drugc* *oop* *iprem* nurs* hos* drt* estateflag hhmedicaid anninc tc rtc *wgt* SSDIY SSIinc 
ageshift* sing* bPI* hhage* hhstatus *mandied *manheal *manINC *mannursing pi_perc* amsp* hhmedicaid hhmedicaid_pay firstwave death* manadl womanadl;

sum SSIinc SSDIY;
reg SSIinc SSDIY;

gen SS1=0 if SSIinc==0;
replace SS1=1 if SSIinc>0 & SSIinc~=.;
gen SS2=0 if SSDIY==0;

replace SS2=1 if SSDIY>0 & SSDIY~=.;
tab SS1 SS2;
tab SS1 hhmedicaid;
tab SS2 hhmedicaid;


*exit;

sum adist_weighted;
sort HHID PN wave;
replace adist_weighted =adist_weighted[_n-1] if HHID==HHID[_n-1];
sum adist_weighted;
*exit;


sort dead;
by dead: sum nur*;
*exit;

sort indnum wave;


/* We want died to be missing if you've already died (i.e. died=1 only once);*/
replace died=. if (dead==1 & indnum==indnum[_n-1] & dead[_n-1]==1);

replace heal=6 if dead==1;


/* Summary of heal, dead, died variables:
If heal is nonmissing: dead=0 & died=0 

Heal is 6 if you just died: died=1 & heal=6

If heal is missing, one of the following is true:
-you're dead and we're past the death event: dead=1 & died=. 
-you're alive (dead=0), were interviewed, but didn't give an answer for health status (11 observations, excluding wave2)
-you couldn't be found in an interview year, but were alive in subsequent periods. heal=. and dead was recoded to 0  presumably by Eric French (627 observations)
-you couldn't be found and were never subsequently found (252 observations, 107 individuals)
*/

* generate a home equity + wheels;
gen homeeq=house+wheels-mort;

* amount of estate not going to the spouse;
gen amothers=estate-amspouse;
*drop super-outliers; 
replace amothers = . if estate>9999999|amspouse>9999999;
replace amothers=0 if amothers<0 & amothers~=.;


/* Recode Current Health Status to a 3 point distribution */
*  Health States: 3=good 2=bad 1=nursingHome 0=dead ;
gen trueheal=heal;
* topcode nights in a nursing home;
sum nursing;
replace mannursing=365 if mannursing>365 & mannursing~=.;
replace womannursing=365 if womannursing>365 & womannursing~=.;
sum mannursing womannursing;

* code someone as being in a nursing home if they are in a nursing home next year and die next year;
*replace nursing=65 if nursing>30 & nursing<65;

gen mantemp=.;
replace mantemp=3 if (manheal==1|manheal==2|manheal==3);
replace mantemp=2 if (manheal==4|manheal==5);
replace mantemp=1 if mannursing>60 & mannursing<. ;
replace mantemp=0 if mandied==1;



*generate adl status hierarchically so that it only selects those without Nursing home stays;
gen mantempadl=.;
replace mantempadl=4 if (manheal==1|manheal==2|manheal==3);
replace mantempadl=3 if (manheal==4|manheal==5);
replace mantempadl=2 if (manadl>=2 & manadl!=.);
replace mantempadl=1 if mannursing>60 & mannursing<. ;
replace mantempadl=0 if mandied==1;

*ADL creation has to happen first because overwrite here;
replace manheal=mantemp;
gen manhealadl=mantempadl;


gen womantemp=.;
replace womantemp=3 if (womanheal==1|womanheal==2|womanheal==3);
replace womantemp=2 if (womanheal==4|womanheal==5);
replace womantemp=1 if womannursing>60 & womannursing<. ;
replace womantemp=0 if womandied==1;

gen womantempadl=.;
replace womantempadl=4 if (womanheal==1|womanheal==2|womanheal==3);
replace womantempadl=3 if (womanheal==4|womanheal==5);
replace womantempadl=2 if (womanadl>=2 & womanadl!=.);
replace womantempadl=1 if womannursing>60 & womannursing<. ;
replace womantempadl=0 if womandied==1;

replace womanheal=womantemp;
gen womanhealadl=womantempadl;

tab womanhealadl womanheal;

tab manhealadl manheal;

* the next block of code defines an individual as being in the nursing home state if she was not in a nursing home;
* at least 120 days over the last 2 years, but is in a nursing home at least 60 days over the next 2 years AND they die over the next 2 yeats;

*added code to account for ADL status;
sort HHID wave;
gen mannursndead=0;
replace mannursndead=1 if mandead==0 & mandead[_n+1]==1 & mannursing[_n+1]>60 & mannursing[_n+1]<.;
gen manhealmeas2=manheal;
replace manhealmeas2=1 if manhealmeas2~=1 & mannursndead==1;
tab manheal;
gen manhealmeas3=manhealadl;
replace manhealmeas3=1 if manhealmeas3~=1 & mannursndead==1;

sort HHID wave;
gen womannursndead=0;
replace womannursndead=1 if womandead==0 & womandead[_n+1]==1 & womannursing[_n+1]>60 & womannursing[_n+1]<.;
gen womanhealmeas2=womanheal;
replace womanhealmeas2=1 if womanhealmeas2~=1 & womannursndead==1;
gen womanhealmeas3=womanhealadl;
replace womanhealmeas3=1 if womanhealmeas3~=1 & womannursndead==1;
tab womanheal;

* decide which health measure to use;
replace manheal=manhealmeas2;
replace womanheal=womanhealmeas2;

replace manhealadl=manhealmeas3;
replace womanhealadl=womanhealmeas3;


* children;
sort HHID wave;
replace children=children[_n-1] if HHID==HHID[_n-1] & children==. & children[_n-1]~=.;


gen couple=(hhstatus==3);
gen child=(children>0 & children~=.);

order HHID wave *heal *healmeas2 *nursing;

sort HHID wave;

* lagged variables;
gen lage=age[_n-1] if HHID==HHID[_n-1];
gen lage_wife=age_wife[_n-1] if HHID==HHID[_n-1];
gen lhhage=hhage[_n-1] if HHID==HHID[_n-1];
gen lcouple=couple[_n-1] if HHID==HHID[_n-1];
gen lmanheal=manheal[_n-1] if HHID==HHID[_n-1];
gen lwomanheal=womanheal[_n-1] if HHID==HHID[_n-1];
gen lmanhealadl=manhealadl[_n-1] if HHID==HHID[_n-1];
gen lwomanhealadl=womanhealadl[_n-1] if HHID==HHID[_n-1];
gen lage_w=lage if male==0;
replace lage_w=lage_wife if firsthh==3;

*impute lmanheal and lwomanheal if missing and not set to 1 via nursndead;
gen donor=(mandied==1 & lmanheal>1 & lmanheal<.);
gen recip=(mandied==1 & lmanheal==.);
gen lmanhealbin=(lmanheal==3);
logit lmanhealbin PI lage lcouple if donor==1;
predict lhealprob if recip==1;
by HHID: gen rand=runiform();
replace lmanheal=2 if rand>lhealprob & recip==1;
replace lmanheal=3 if rand<lhealprob & recip==1;
replace manheal=lmanheal[_n+1] if HHID==HHID[_n+1];
drop donor recip lmanhealbin lhealprob rand;

gen donor=(womandied==1 & lwomanheal>1 & lwomanheal<.);
gen recip=(womandied==1 & lwomanheal==.);
gen lwomanhealbin=(lwomanheal==3);
logit lwomanhealbin PI lage_w lcouple if donor==1;
predict lhealprob if recip==1;
by HHID: gen rand=runiform();
replace lwomanheal=2 if rand>lhealprob & recip==1;
replace lwomanheal=3 if rand<lhealprob & recip==1;
replace womanheal=lwomanheal[_n+1] if HHID==HHID[_n+1];
drop donor recip lwomanhealbin lhealprob rand;

*impute lmanheal and lwomanheal if missing before entering nursing home via nursndead;

gen donor=(manheal==1 & lmanheal>0 & lmanheal<.);
gen recip=(manheal==1 & lmanheal==.);
gen lmanhealbin1=(lmanheal==1);
gen lmanhealbin2=(lmanheal==2);
logit lmanhealbin1 PI lage lcouple if donor==1;
predict lhealprob1 if recip==1;
logit lmanhealbin2 PI lage lcouple if donor==1;
predict lhealprob2 if recip==1;
replace lhealprob2=lhealprob1+lhealprob2;
by HHID: gen rand=runiform();
replace lmanheal=1 if rand<lhealprob1 & recip==1;
replace lmanheal=2 if rand>lhealprob1 & rand<lhealprob2 & recip==1;
replace lmanheal=3 if rand>lhealprob2 & recip==1;
replace manheal=lmanheal[_n+1] if HHID==HHID[_n+1];
drop donor recip lmanhealbin1 lhealprob1 lmanhealbin2 lhealprob2 rand;

gen donor=(womanheal==1 & lwomanheal>0 & lwomanheal<.);
gen recip=(womanheal==1 & lwomanheal==.);
gen lwomanhealbin1=(lwomanheal==1);
gen lwomanhealbin2=(lwomanheal==2);
logit lwomanhealbin1 PI lage_w lcouple if donor==1;
predict lhealprob1 if recip==1;
logit lwomanhealbin2 PI lage_w lcouple if donor==1;
predict lhealprob2 if recip==1;
replace lhealprob2=lhealprob1+lhealprob2;
by HHID: gen rand=runiform();
replace lwomanheal=1 if rand<lhealprob1 & recip==1;
replace lwomanheal=2 if rand>lhealprob1 & rand<lhealprob2 & recip==1;
replace lwomanheal=3 if rand>lhealprob2 & recip==1;
replace womanheal=lwomanheal[_n+1] if HHID==HHID[_n+1];
drop donor recip lwomanhealbin1 lhealprob1 lwomanhealbin2 lhealprob2 rand;



* bottom code for medical expenses -- we will play around with this;
* Charles -- I want you to fiddle around with this thing to get our numbers right;
*Always bottom code hhmedcost_b which includes part b and insurance premia. This avoids double bottom coding in wealthmat and medex.do;

* we want to divide deathex by 2 because it makes sense to do so;
* later we multiply medocst by 2 to get total over a 2 year period;
replace hhmedcost_b=hhmedcost_b+deathex/2 if deathex~=.;

replace hhmedcost_b=100 if hhmedcost_b<100 & hhmedcost_b>-1 & hhmedcost_b~=.; * censor the distn -our preferred coding of the data;


 
 sum deathex hhmedcost_b if hhmedcost_b!=. &deathex!=., det;

corr hhmedcost_b deathex;
*saveold C:\Dropbox\hrs\wealthcouples\dataprep2_4states, replace;
saveold C:\Users\ebf26\Dropbox\hrs\wealthcouples\dataprep2_4states, replace;
drop _all;
program drop _all;
log close;
